winsafe\kernel\utilities/
w_string.rs

1use std::cmp::Ordering;
2
3use crate::co;
4use crate::decl::*;
5use crate::kernel::ffi;
6
7/// Stores a `[u16]` buffer for a null-terminated
8/// [Unicode UTF-16](https://learn.microsoft.com/en-us/windows/win32/intl/unicode-in-the-windows-api)
9/// wide string natively used by Windows.
10///
11/// Uses
12/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html)
13/// technique for faster performance.
14///
15/// This is struct is mostly used internally by the library, as a bridge between
16/// Windows and Rust strings.
17#[derive(Default, Clone)]
18pub struct WString {
19	buf: Buffer,
20}
21
22impl std::fmt::Display for WString {
23	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24		let txt = match self.buf.to_string_checked() {
25			Ok(t) => t,
26			Err(e) => format!("PARSING ERROR: {}", e.to_string()),
27		};
28		std::fmt::Display::fmt(&txt, f)
29	}
30}
31impl std::fmt::Debug for WString {
32	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33		std::fmt::Debug::fmt(&self.buf, f)
34	}
35}
36
37impl std::cmp::PartialEq for WString {
38	fn eq(&self, other: &Self) -> bool {
39		self.cmp(other) == Ordering::Equal
40	}
41}
42impl std::cmp::Eq for WString {}
43
44impl std::cmp::PartialOrd for WString {
45	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
46		let ord = unsafe { ffi::lstrcmpW(self.as_ptr(), other.as_ptr()) };
47		Some(if ord < 0 {
48			Ordering::Less
49		} else if ord > 0 {
50			Ordering::Greater
51		} else {
52			Ordering::Equal
53		})
54	}
55}
56impl std::cmp::Ord for WString {
57	fn cmp(&self, other: &Self) -> Ordering {
58		self.partial_cmp(other).unwrap()
59	}
60}
61
62impl WString {
63	/// Stack size for internal
64	/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html).
65	pub const SSO_LEN: usize = Buffer::SSO_LEN;
66
67	/// Stores an UTF-16 null-terminated string from an optional [`&str`](str).
68	///
69	/// If `s` is `None` or the string is empty, no allocation is made.
70	#[must_use]
71	pub fn from_opt_str(s: Option<impl AsRef<str>>) -> Self {
72		Self { buf: Buffer::from_opt_str(s) }
73	}
74
75	/// Stores an UTF-16 null-terminated string from a [`&str`](str).
76	///
77	/// If the string is empty, no allocation is made.
78	#[must_use]
79	pub fn from_str(s: impl AsRef<str>) -> Self {
80		Self { buf: Buffer::from_str(s, ForceHeap::No) }
81	}
82
83	/// Stores an UTF-16 null-terminated string from a [`&str`](str), bypassing
84	/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html)
85	/// – that is, forcing the internal allocation on the heap. This should be
86	/// rarely needed.
87	///
88	/// If the string is empty, no allocation is made.
89	#[must_use]
90	pub fn from_str_force_heap(s: impl AsRef<str>) -> Self {
91		Self { buf: Buffer::from_str(s, ForceHeap::Yes) }
92	}
93
94	/// Stores a series of UTF-16 null-terminated strings. The buffer will end
95	/// with two terminating nulls – that means further retrieval operations
96	/// will "see" only the first string.
97	///
98	/// If the slice is empty, no allocation is made.
99	///
100	/// This method can be used as an escape hatch to interoperate with other
101	/// libraries.
102	#[must_use]
103	pub fn from_str_vec(v: &[impl AsRef<str>]) -> Self {
104		Self { buf: Buffer::from_str_vec(v) }
105	}
106
107	/// Stores an UTF-16 null-terminated string by copying from a buffer,
108	/// specifying the number of chars to be copied.
109	///
110	/// The `src` buffer doesn't need to be null-terminated.
111	#[must_use]
112	pub fn from_wchars_count(src: *const u16, num_chars: usize) -> Self {
113		Self {
114			buf: Buffer::from_wchars_count(src, num_chars),
115		}
116	}
117
118	/// Stores an UTF-16 null-terminated string by copying from a
119	/// null-terminated buffer. The string length is retrieved with
120	/// [`lstrlen`](https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-lstrlenw).
121	///
122	/// # Safety
123	///
124	/// Be sure the string is null-terminated, otherwise `lstrlen` will get
125	/// lost, possibly reading an invalid memory location.
126	#[must_use]
127	pub unsafe fn from_wchars_nullt(src: *const u16) -> Self {
128		Self {
129			buf: unsafe { Buffer::from_wchars_nullt(src) },
130		}
131	}
132
133	/// Stores an UTF-16 null-terminated string by copying from a slice.
134	///
135	/// The `src` slice doesn't need to be null-terminated.
136	#[must_use]
137	pub fn from_wchars_slice(src: &[u16]) -> Self {
138		Self { buf: Buffer::from_wchars_slice(src) }
139	}
140
141	/// Constructs a new, empty `WString`. No allocation is made.
142	#[must_use]
143	pub const fn new() -> Self {
144		Self { buf: Buffer::new() }
145	}
146
147	/// Allocates an UTF-16 buffer with an specific length. All elements will be
148	/// set to zero.
149	#[must_use]
150	pub fn new_alloc_buf(sz: usize) -> Self {
151		Self {
152			buf: Buffer::new_alloc_buf(sz, ForceHeap::No),
153		}
154	}
155
156	/// Returns a mutable
157	/// [`LPWSTR`](https://learn.microsoft.com/en-us/windows/win32/learnwin32/working-with-strings)
158	/// pointer to the internal UTF-16 string buffer, to be passed to native
159	/// Win32 functions. This is useful to receive strings.
160	///
161	/// # Panics
162	///
163	/// Panics if the buffer was not allocated.
164	///
165	/// # Safety
166	///
167	/// Be sure to alloc enough room, otherwise a buffer overrun may occur.
168	#[must_use]
169	pub const unsafe fn as_mut_ptr(&mut self) -> *mut u16 {
170		unsafe { self.buf.as_mut_ptr() }
171	}
172
173	/// Returns a mutable slice to the internal UTF-16 string buffer.
174	#[must_use]
175	pub const fn as_mut_slice(&mut self) -> &mut [u16] {
176		self.buf.as_mut_slice()
177	}
178
179	/// Returns a
180	/// [`LPCWSTR`](https://learn.microsoft.com/en-us/windows/win32/learnwin32/working-with-strings)
181	/// pointer to the internal UTF-16 string buffer, to be passed to native
182	/// Win32 functions.
183	///
184	/// If the buffer was not allocated, returns a null pointer.
185	#[must_use]
186	pub const fn as_ptr(&self) -> *const u16 {
187		self.buf.as_ptr()
188	}
189
190	/// Returns a slice to the internal UTF-16 string buffer.
191	#[must_use]
192	pub const fn as_slice(&self) -> &[u16] {
193		self.buf.as_slice()
194	}
195
196	/// Returns the size of the allocated internal buffer, in `u16` wide chars.
197	/// Note that the terminating null, if existing, is also counted.
198	///
199	/// If the buffer was not allocated yet, returns zero.
200	#[must_use]
201	pub const fn buf_len(&self) -> usize {
202		self.buf.buf_len()
203	}
204
205	/// Copies the content into an external buffer. A terminating null will be
206	/// appended.
207	///
208	/// If `dest` is smaller, the string will be truncated.
209	///
210	/// If `dest` has 1 element, it will receive only the terminating null.
211	pub fn copy_to_slice(&self, dest: &mut [u16]) {
212		if !dest.is_empty() {
213			let usable_len = dest.len() - 1; // leave room for terminating null
214			self.as_slice()
215				.iter()
216				.zip(dest[..usable_len].iter_mut())
217				.for_each(|(src, dest)| *dest = *src);
218			dest[usable_len..]
219				.iter_mut()
220				.for_each(|dest| *dest = 0x0000); // fill the rest with zero
221		}
222	}
223
224	/// Fills the entire buffer with zeros.
225	pub fn fill_with_zero(&mut self) {
226		self.as_mut_slice().iter_mut().for_each(|ch| *ch = 0x0000);
227	}
228
229	/// Returns `true` if the internal buffer has been allocated.
230	#[must_use]
231	pub const fn is_allocated(&self) -> bool {
232		self.buf.is_allocated()
233	}
234
235	/// Converts into [`String`](std::string::String) by calling
236	/// [`String::from_utf16`](std::string::String::from_utf16). An uncallocated
237	/// will simply be converted into an empty string.
238	///
239	/// This method is useful if you're parsing raw data which may contain
240	/// invalid characters. If you're dealing with a string known to be valid,
241	/// [`to_string`](std::string::ToString::to_string) is more practical.
242	#[must_use]
243	pub fn to_string_checked(&self) -> Result<String, std::string::FromUtf16Error> {
244		self.buf.to_string_checked()
245	}
246
247	/// Wrapper to
248	/// [`lstrlen`](https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-lstrlenw).
249	///
250	/// Returns the number of [`u16`] characters stored in the internal buffer,
251	/// not counting the terminating null.
252	#[must_use]
253	pub fn str_len(&self) -> usize {
254		unsafe { ffi::lstrlenW(self.buf.as_ptr()) as _ }
255	}
256
257	/// Converts the string to lower case, in-place. Wrapper to
258	/// [`CharLower`](https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-charlowerw).
259	pub fn make_lowercase(&mut self) {
260		unsafe {
261			ffi::CharLowerW(self.as_mut_ptr());
262		}
263	}
264
265	/// Converts the string to upper case, in-place. Wrapper to
266	/// [`CharUpper`](https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-charupperw).
267	pub fn make_uppercase(&mut self) {
268		unsafe {
269			ffi::CharUpperW(self.as_mut_ptr());
270		}
271	}
272
273	/// Guesses the encoding with [`Encoding::guess`](crate::Encoding::guess)
274	/// and parses the data as a string.
275	///
276	/// If you're sure the data has UTF-8 encoding, you can also use the
277	/// built-in [`String::from_utf8`](std::string::String::from_utf8).
278	///
279	/// To serialize the string back into UTF-8 bytes, use the built-in
280	/// [`String::into_bytes`](std::string::String::into_bytes).
281	///
282	/// # Examples
283	///
284	/// Usually the fastest way to read the text from a file is by mapping its
285	/// contents in memory with [`FileMapped`](crate::FileMapped), then parsing:
286	///
287	/// ```no_run
288	/// use winsafe::{self as w, prelude::*};
289	///
290	/// let file_in = w::FileMapped::open(
291	///     "C:\\Temp\\foo.txt",
292	///     w::FileAccess::ExistingReadOnly,
293	/// )?;
294	/// let wstr = w::WString::parse(file_in.as_slice())?;
295	/// let str_contents = wstr.to_string();
296	/// # w::SysResult::Ok(())
297	/// ```
298	#[must_use]
299	pub fn parse(data: &[u8]) -> SysResult<Self> {
300		let mut data = data;
301		if data.is_empty() {
302			return Ok(Self::new()); // nothing to parse
303		}
304
305		let (encoding, sz_bom) = Encoding::guess(data);
306		data = &data[sz_bom..]; // skip BOM, if any
307
308		Ok(Self::from_wchars_slice(&match encoding {
309			Encoding::Ansi => Self::parse_ansi(data),
310			Encoding::Win1252 => MultiByteToWideChar(co::CP::WINDOWS_1252, co::MBC::NoValue, data)?,
311			Encoding::Utf8 => MultiByteToWideChar(co::CP::UTF8, co::MBC::NoValue, data)?,
312			Encoding::Utf16be => Self::parse_utf16(data, true),
313			Encoding::Utf16le => Self::parse_utf16(data, false),
314			Encoding::Utf32be
315			| Encoding::Utf32le
316			| Encoding::Scsu
317			| Encoding::Bocu1
318			| Encoding::Unknown => panic!("Encoding {} not implemented.", encoding),
319		}))
320	}
321
322	fn parse_ansi(data: &[u8]) -> Vec<u16> {
323		data.iter()
324			.take_while(|ch| **ch != 0x0000) // ignore terminating null, if any
325			.map(|ch| *ch as u16) // raw u8 to u16 conversion
326			.collect()
327	}
328
329	fn parse_utf16(data: &[u8], is_big_endian: bool) -> Vec<u16> {
330		let data = if data.len() % 2 == 1 {
331			&data[..data.len() - 1] // if odd number of bytes, discard last one
332		} else {
333			data
334		};
335
336		data.chunks(2)
337			.take_while(|ch2| **ch2 != [0x00, 0x00]) // ignore terminating null, if any
338			.map(|ch2| {
339				if is_big_endian {
340					u16::from_be_bytes(ch2.try_into().unwrap())
341				} else {
342					u16::from_le_bytes(ch2.try_into().unwrap())
343				}
344			})
345			.collect()
346	}
347}
348
349#[derive(PartialEq, Eq)]
350enum ForceHeap {
351	Yes,
352	No,
353}
354
355enum Buffer {
356	Stack([u16; Self::SSO_LEN]),
357	Heap(Vec<u16>),
358	Unallocated,
359}
360
361impl Default for Buffer {
362	fn default() -> Self {
363		Self::Unallocated
364	}
365}
366
367impl Clone for Buffer {
368	fn clone(&self) -> Self {
369		match self {
370			Self::Unallocated => Self::Unallocated,
371			_ => {
372				let mut new_self = Self::new_alloc_buf(self.buf_len(), ForceHeap::No);
373				self.as_slice()
374					.iter()
375					.zip(new_self.as_mut_slice())
376					.for_each(|(src, dest)| *dest = *src);
377				new_self
378			},
379		}
380	}
381}
382
383impl std::fmt::Debug for Buffer {
384	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
385		let txt = match self.to_string_checked() {
386			Ok(t) => t,
387			Err(e) => format!("PARSING ERROR: {}", e.to_string()),
388		};
389		write!(
390			f,
391			"{}",
392			match self {
393				Self::Stack(_) => format!("STACK({}) \"{}\"", self.buf_len(), txt),
394				Self::Heap(_) => format!("HEAP({}) \"{}\"", self.buf_len(), txt),
395				Self::Unallocated => "UNALLOCATED \"\"".to_owned(),
396			}
397		)
398	}
399}
400
401impl Buffer {
402	pub const SSO_LEN: usize = 20;
403
404	#[must_use]
405	fn from_opt_str(s: Option<impl AsRef<str>>) -> Self {
406		match s {
407			Some(s) => Self::from_str(s, ForceHeap::No),
408			None => Self::Unallocated,
409		}
410	}
411
412	#[must_use]
413	fn from_str(s: impl AsRef<str>, force_heap: ForceHeap) -> Self {
414		let s_len = s.as_ref().encode_utf16().count();
415		if s_len == 0 {
416			Self::Unallocated
417		} else {
418			let num_chars = s_len + 1; // room for terminating null
419			let mut new_self = Self::new_alloc_buf(num_chars, force_heap);
420			s.as_ref()
421				.encode_utf16()
422				.zip(new_self.as_mut_slice())
423				.for_each(|(src, dest)| *dest = src);
424			new_self
425		}
426	}
427
428	#[must_use]
429	fn from_str_vec(v: &[impl AsRef<str>]) -> Self {
430		if v.is_empty() {
431			return Self::Unallocated; // no elements yield an empty buffer
432		}
433
434		let tot_chars = v.iter() // number of chars of all strings, including terminating nulls
435			.fold(0, |tot, s| tot + s.as_ref().chars().count() + 1) // include terminating null
436			+ 1; // double terminating null
437		let mut new_self = Self::new_alloc_buf(tot_chars, ForceHeap::No);
438		v.iter()
439			.map(|s| {
440				s.as_ref().encode_utf16().chain(std::iter::once(0x0000)) // append terminating null on each string
441			})
442			.flatten()
443			.zip(new_self.as_mut_slice())
444			.for_each(|(src, dest)| *dest = src);
445		new_self
446	}
447
448	#[must_use]
449	fn from_wchars_count(src: *const u16, num_chars: usize) -> Self {
450		if src.is_null() || num_chars == 0 {
451			Self::Unallocated
452		} else {
453			Self::from_wchars_slice(unsafe { std::slice::from_raw_parts(src, num_chars) })
454		}
455	}
456
457	#[must_use]
458	unsafe fn from_wchars_nullt(src: *const u16) -> Self {
459		Self::from_wchars_count(src, unsafe { ffi::lstrlenW(src) as _ })
460	}
461
462	#[must_use]
463	fn from_wchars_slice(src: &[u16]) -> Self {
464		if src.is_empty() {
465			Self::Unallocated
466		} else {
467			let num_chars = src
468				.iter()
469				.take_while(|ch| **ch != 0x0000) // skip terminating null, if any
470				.count() + 1; // room for terminating null
471			let mut new_self = Self::new_alloc_buf(num_chars, ForceHeap::No);
472			src.iter()
473				.take_while(|ch| **ch != 0x0000) // skip terminating null, if any
474				.zip(new_self.as_mut_slice())
475				.for_each(|(src, dest)| *dest = *src);
476			new_self
477		}
478	}
479
480	#[must_use]
481	const fn new() -> Self {
482		Self::Unallocated
483	}
484
485	#[must_use]
486	fn new_alloc_buf(num_chars: usize, force_heap: ForceHeap) -> Self {
487		if num_chars == 0 {
488			Self::Unallocated
489		} else if force_heap == ForceHeap::Yes || num_chars > Self::SSO_LEN {
490			Self::Heap(vec![0x0000; num_chars])
491		} else {
492			Self::Stack([0x0000; Self::SSO_LEN])
493		}
494	}
495
496	#[must_use]
497	const unsafe fn as_mut_ptr(&mut self) -> *mut u16 {
498		match self {
499			Self::Stack(arr) => arr.as_mut_ptr(),
500			Self::Heap(vec) => {
501				if vec.is_empty() {
502					std::ptr::null_mut() // because empty Vec returns garbage as ptr
503				} else {
504					vec.as_mut_ptr()
505				}
506			},
507			Self::Unallocated => panic!("Trying to use an unallocated WString buffer."),
508		}
509	}
510
511	#[must_use]
512	const fn as_mut_slice(&mut self) -> &mut [u16] {
513		match self {
514			Self::Stack(arr) => arr,
515			Self::Heap(vec) => vec.as_mut_slice(),
516			Self::Unallocated => &mut [],
517		}
518	}
519
520	#[must_use]
521	const fn as_ptr(&self) -> *const u16 {
522		match self {
523			Self::Stack(arr) => arr.as_ptr(),
524			Self::Heap(vec) => {
525				if vec.is_empty() {
526					std::ptr::null() // because empty Vec returns garbage as ptr
527				} else {
528					vec.as_ptr()
529				}
530			},
531			Self::Unallocated => std::ptr::null(),
532		}
533	}
534
535	#[must_use]
536	const fn as_slice(&self) -> &[u16] {
537		match self {
538			Self::Stack(arr) => arr,
539			Self::Heap(vec) => vec.as_slice(),
540			Self::Unallocated => &[],
541		}
542	}
543
544	#[must_use]
545	const fn buf_len(&self) -> usize {
546		match self {
547			Self::Stack(arr) => arr.len(),
548			Self::Heap(vec) => vec.len(),
549			Self::Unallocated => 0,
550		}
551	}
552
553	#[must_use]
554	const fn is_allocated(&self) -> bool {
555		match self {
556			Self::Unallocated => false,
557			_ => true,
558		}
559	}
560
561	#[must_use]
562	fn to_string_checked(&self) -> Result<String, std::string::FromUtf16Error> {
563		match self {
564			Self::Unallocated => Ok(String::new()),
565			_ => String::from_utf16(
566				&self
567					.as_slice()
568					.into_iter()
569					.take_while(|ch| **ch != 0x0000) // remove all trailing zeros
570					.map(|ch| *ch)
571					.collect::<Vec<_>>(),
572			),
573		}
574	}
575}